#ifndef CUFFTDX_FFT_27_FP64_FWD_PTX_HPP
#define CUFFTDX_FFT_27_FP64_FWD_PTX_HPP



template<> __forceinline__ __device__ void cufftdx_private_function<504, double, 1>(cufftdx::detail::complex<double> *rmem, unsigned smem){

asm volatile (R"({
.reg .f64 fd<796>;
.reg .b64 rd<5>;
add.f64 fd109, %72, %90;
add.f64 fd110, %54, fd109;
mul.f64 fd113, fd109, 0d3FE0000000000000;
sub.f64 fd114, %54, fd113;
add.f64 fd789, %73, %91;
sub.f64 fd115, %73, %91;
mul.f64 fd116, fd115, 0d3FEBB67AE8584CAA;
add.f64 fd117, fd116, fd114;
sub.f64 fd118, fd114, fd116;
add.f64 fd788, %55, fd789;
mul.f64 fd119, fd789, 0d3FE0000000000000;
sub.f64 fd120, %55, fd119;
sub.f64 fd121, %72, %90;
mul.f64 fd122, fd121, 0d3FEBB67AE8584CAA;
sub.f64 fd123, fd120, fd122;
add.f64 fd124, fd122, fd120;
add.f64 fd125, %78, %96;
add.f64 fd126, %60, fd125;
mul.f64 fd129, fd125, 0d3FE0000000000000;
sub.f64 fd130, %60, fd129;
add.f64 fd787, %79, %97;
sub.f64 fd131, %79, %97;
mul.f64 fd132, fd131, 0d3FEBB67AE8584CAA;
add.f64 fd133, fd132, fd130;
sub.f64 fd134, fd130, fd132;
add.f64 fd786, %61, fd787;
mul.f64 fd135, fd787, 0d3FE0000000000000;
sub.f64 fd136, %61, fd135;
sub.f64 fd137, %78, %96;
mul.f64 fd138, fd137, 0d3FEBB67AE8584CAA;
sub.f64 fd139, fd136, fd138;
add.f64 fd140, fd138, fd136;
add.f64 fd141, %84, %102;
add.f64 fd142, %66, fd141;
mul.f64 fd145, fd141, 0d3FE0000000000000;
sub.f64 fd146, %66, fd145;
add.f64 fd785, %85, %103;
sub.f64 fd147, %85, %103;
mul.f64 fd148, fd147, 0d3FEBB67AE8584CAA;
add.f64 fd149, fd148, fd146;
sub.f64 fd150, fd146, fd148;
add.f64 fd784, %67, fd785;
mul.f64 fd151, fd785, 0d3FE0000000000000;
sub.f64 fd152, %67, fd151;
sub.f64 fd153, %84, %102;
mul.f64 fd154, fd153, 0d3FEBB67AE8584CAA;
sub.f64 fd155, fd152, fd154;
add.f64 fd156, fd154, fd152;
mul.f64 fd158, fd139, 0dBFE491B7523C161D;
mul.f64 fd783, fd133, 0d3FE8836FA2CF5039;
sub.f64 fd159, fd783, fd158;
mul.f64 fd160, fd139, 0d3FE8836FA2CF5039;
fma.rn.f64 fd161, fd133, 0dBFE491B7523C161D, fd160;
mul.f64 fd163, fd155, 0dBFEF838B8C811C17;
mul.f64 fd782, fd149, 0d3FC63A1A7E0B738A;
sub.f64 fd164, fd782, fd163;
mul.f64 fd165, fd155, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd166, fd149, 0dBFEF838B8C811C17, fd165;
mul.f64 fd168, fd140, 0dBFEF838B8C811C17;
mul.f64 fd781, fd134, 0d3FC63A1A7E0B738A;
sub.f64 fd169, fd781, fd168;
mul.f64 fd170, fd140, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd171, fd134, 0dBFEF838B8C811C17, fd170;
mul.f64 fd173, fd156, 0dBFD5E3A8748A0BF5;
mul.f64 fd780, fd150, 0dBFEE11F642522D1C;
sub.f64 fd174, fd780, fd173;
mul.f64 fd175, fd156, 0dBFEE11F642522D1C;
fma.rn.f64 fd176, fd150, 0dBFD5E3A8748A0BF5, fd175;
add.f64 fd177, fd126, fd142;
add.f64 fd178, fd110, fd177;
mul.f64 fd181, fd177, 0d3FE0000000000000;
sub.f64 fd182, fd110, fd181;
add.f64 fd779, fd786, fd784;
sub.f64 fd183, fd786, fd784;
mul.f64 fd184, fd183, 0d3FEBB67AE8584CAA;
add.f64 fd185, fd184, fd182;
sub.f64 fd186, fd182, fd184;
add.f64 fd778, fd788, fd779;
mul.f64 fd187, fd779, 0d3FE0000000000000;
sub.f64 fd188, fd788, fd187;
sub.f64 fd189, fd126, fd142;
mul.f64 fd190, fd189, 0d3FEBB67AE8584CAA;
sub.f64 fd191, fd188, fd190;
add.f64 fd192, fd190, fd188;
add.f64 fd193, fd159, fd164;
add.f64 fd194, fd117, fd193;
mul.f64 fd197, fd193, 0d3FE0000000000000;
sub.f64 fd198, fd117, fd197;
add.f64 fd777, fd161, fd166;
sub.f64 fd199, fd161, fd166;
mul.f64 fd200, fd199, 0d3FEBB67AE8584CAA;
add.f64 fd201, fd200, fd198;
sub.f64 fd202, fd198, fd200;
add.f64 fd776, fd123, fd777;
mul.f64 fd203, fd777, 0d3FE0000000000000;
sub.f64 fd204, fd123, fd203;
sub.f64 fd205, fd159, fd164;
mul.f64 fd206, fd205, 0d3FEBB67AE8584CAA;
sub.f64 fd207, fd204, fd206;
add.f64 fd208, fd206, fd204;
add.f64 fd209, fd169, fd174;
add.f64 fd210, fd118, fd209;
mul.f64 fd213, fd209, 0d3FE0000000000000;
sub.f64 fd214, fd118, fd213;
add.f64 fd775, fd171, fd176;
sub.f64 fd215, fd171, fd176;
mul.f64 fd216, fd215, 0d3FEBB67AE8584CAA;
add.f64 fd217, fd216, fd214;
sub.f64 fd218, fd214, fd216;
add.f64 fd774, fd124, fd775;
mul.f64 fd219, fd775, 0d3FE0000000000000;
sub.f64 fd220, fd124, fd219;
sub.f64 fd221, fd169, fd174;
mul.f64 fd222, fd221, 0d3FEBB67AE8584CAA;
sub.f64 fd223, fd220, fd222;
add.f64 fd224, fd222, fd220;
add.f64 fd225, %74, %92;
add.f64 fd226, %56, fd225;
mul.f64 fd229, fd225, 0d3FE0000000000000;
sub.f64 fd230, %56, fd229;
add.f64 fd771, %109, %108;
sub.f64 fd231, %109, %108;
mul.f64 fd232, fd231, 0d3FEBB67AE8584CAA;
add.f64 fd233, fd232, fd230;
sub.f64 fd234, fd230, fd232;
add.f64 fd769, %110, fd771;
mul.f64 fd235, fd771, 0d3FE0000000000000;
sub.f64 fd236, %110, fd235;
sub.f64 fd237, %74, %92;
mul.f64 fd238, fd237, 0d3FEBB67AE8584CAA;
sub.f64 fd239, fd236, fd238;
add.f64 fd240, fd238, fd236;
add.f64 fd241, %80, %98;
add.f64 fd242, %62, fd241;
mul.f64 fd245, fd241, 0d3FE0000000000000;
sub.f64 fd246, %62, fd245;
add.f64 fd766, %111, %112;
sub.f64 fd247, %111, %112;
mul.f64 fd248, fd247, 0d3FEBB67AE8584CAA;
add.f64 fd249, fd248, fd246;
sub.f64 fd250, fd246, fd248;
add.f64 fd764, %113, fd766;
mul.f64 fd251, fd766, 0d3FE0000000000000;
sub.f64 fd252, %113, fd251;
sub.f64 fd253, %80, %98;
mul.f64 fd254, fd253, 0d3FEBB67AE8584CAA;
sub.f64 fd255, fd252, fd254;
add.f64 fd256, fd254, fd252;
add.f64 fd257, %86, %104;
add.f64 fd258, %68, fd257;
mul.f64 fd261, fd257, 0d3FE0000000000000;
sub.f64 fd262, %68, fd261;
add.f64 fd761, %115, %114;
sub.f64 fd263, %115, %114;
mul.f64 fd264, fd263, 0d3FEBB67AE8584CAA;
add.f64 fd265, fd264, fd262;
sub.f64 fd266, fd262, fd264;
add.f64 fd759, %116, fd761;
mul.f64 fd267, fd761, 0d3FE0000000000000;
sub.f64 fd268, %116, fd267;
sub.f64 fd269, %86, %104;
mul.f64 fd270, fd269, 0d3FEBB67AE8584CAA;
sub.f64 fd271, fd268, fd270;
add.f64 fd272, fd270, fd268;
mul.f64 fd274, fd255, 0dBFE491B7523C161D;
mul.f64 fd758, fd249, 0d3FE8836FA2CF5039;
sub.f64 fd275, fd758, fd274;
mul.f64 fd276, fd255, 0d3FE8836FA2CF5039;
fma.rn.f64 fd277, fd249, 0dBFE491B7523C161D, fd276;
mul.f64 fd279, fd271, 0dBFEF838B8C811C17;
mul.f64 fd757, fd265, 0d3FC63A1A7E0B738A;
sub.f64 fd280, fd757, fd279;
mul.f64 fd281, fd271, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd282, fd265, 0dBFEF838B8C811C17, fd281;
mul.f64 fd284, fd256, 0dBFEF838B8C811C17;
mul.f64 fd756, fd250, 0d3FC63A1A7E0B738A;
sub.f64 fd285, fd756, fd284;
mul.f64 fd286, fd256, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd287, fd250, 0dBFEF838B8C811C17, fd286;
mul.f64 fd289, fd272, 0dBFD5E3A8748A0BF5;
mul.f64 fd755, fd266, 0dBFEE11F642522D1C;
sub.f64 fd290, fd755, fd289;
mul.f64 fd291, fd272, 0dBFEE11F642522D1C;
fma.rn.f64 fd292, fd266, 0dBFD5E3A8748A0BF5, fd291;
add.f64 fd293, fd242, fd258;
add.f64 fd294, fd226, fd293;
mul.f64 fd297, fd293, 0d3FE0000000000000;
sub.f64 fd298, fd226, fd297;
add.f64 fd754, fd764, fd759;
sub.f64 fd299, fd764, fd759;
mul.f64 fd300, fd299, 0d3FEBB67AE8584CAA;
add.f64 fd301, fd300, fd298;
sub.f64 fd302, fd298, fd300;
add.f64 fd753, fd769, fd754;
mul.f64 fd303, fd754, 0d3FE0000000000000;
sub.f64 fd304, fd769, fd303;
sub.f64 fd305, fd242, fd258;
mul.f64 fd306, fd305, 0d3FEBB67AE8584CAA;
sub.f64 fd307, fd304, fd306;
add.f64 fd308, fd306, fd304;
add.f64 fd309, fd275, fd280;
add.f64 fd310, fd233, fd309;
mul.f64 fd313, fd309, 0d3FE0000000000000;
sub.f64 fd314, fd233, fd313;
add.f64 fd752, fd277, fd282;
sub.f64 fd315, fd277, fd282;
mul.f64 fd316, fd315, 0d3FEBB67AE8584CAA;
add.f64 fd317, fd316, fd314;
sub.f64 fd318, fd314, fd316;
add.f64 fd751, fd239, fd752;
mul.f64 fd319, fd752, 0d3FE0000000000000;
sub.f64 fd320, fd239, fd319;
sub.f64 fd321, fd275, fd280;
mul.f64 fd322, fd321, 0d3FEBB67AE8584CAA;
sub.f64 fd323, fd320, fd322;
add.f64 fd324, fd322, fd320;
add.f64 fd325, fd285, fd290;
add.f64 fd326, fd234, fd325;
mul.f64 fd329, fd325, 0d3FE0000000000000;
sub.f64 fd330, fd234, fd329;
add.f64 fd750, fd287, fd292;
sub.f64 fd331, fd287, fd292;
mul.f64 fd332, fd331, 0d3FEBB67AE8584CAA;
add.f64 fd333, fd332, fd330;
sub.f64 fd334, fd330, fd332;
add.f64 fd749, fd240, fd750;
mul.f64 fd335, fd750, 0d3FE0000000000000;
sub.f64 fd336, fd240, fd335;
sub.f64 fd337, fd285, fd290;
mul.f64 fd338, fd337, 0d3FEBB67AE8584CAA;
sub.f64 fd339, fd336, fd338;
add.f64 fd340, fd338, fd336;
add.f64 fd341, %76, %94;
add.f64 fd342, %58, fd341;
mul.f64 fd345, fd341, 0d3FE0000000000000;
sub.f64 fd346, %58, fd345;
add.f64 fd746, %118, %117;
sub.f64 fd347, %118, %117;
mul.f64 fd348, fd347, 0d3FEBB67AE8584CAA;
add.f64 fd349, fd348, fd346;
sub.f64 fd350, fd346, fd348;
add.f64 fd744, %119, fd746;
mul.f64 fd351, fd746, 0d3FE0000000000000;
sub.f64 fd352, %119, fd351;
sub.f64 fd353, %76, %94;
mul.f64 fd354, fd353, 0d3FEBB67AE8584CAA;
sub.f64 fd355, fd352, fd354;
add.f64 fd356, fd354, fd352;
add.f64 fd357, %82, %100;
add.f64 fd358, %64, fd357;
mul.f64 fd361, fd357, 0d3FE0000000000000;
sub.f64 fd362, %64, fd361;
add.f64 fd741, %121, %120;
sub.f64 fd363, %121, %120;
mul.f64 fd364, fd363, 0d3FEBB67AE8584CAA;
add.f64 fd365, fd364, fd362;
sub.f64 fd366, fd362, fd364;
add.f64 fd739, %122, fd741;
mul.f64 fd367, fd741, 0d3FE0000000000000;
sub.f64 fd368, %122, fd367;
sub.f64 fd369, %82, %100;
mul.f64 fd370, fd369, 0d3FEBB67AE8584CAA;
sub.f64 fd371, fd368, fd370;
add.f64 fd372, fd370, fd368;
add.f64 fd373, %88, %106;
add.f64 fd374, %70, fd373;
mul.f64 fd377, fd373, 0d3FE0000000000000;
sub.f64 fd378, %70, fd377;
add.f64 fd737, %123, %107;
sub.f64 fd379, %123, %107;
mul.f64 fd380, fd379, 0d3FEBB67AE8584CAA;
add.f64 fd381, fd380, fd378;
sub.f64 fd382, fd378, fd380;
add.f64 fd735, %124, fd737;
mul.f64 fd383, fd737, 0d3FE0000000000000;
sub.f64 fd384, %124, fd383;
sub.f64 fd385, %88, %106;
mul.f64 fd386, fd385, 0d3FEBB67AE8584CAA;
sub.f64 fd387, fd384, fd386;
add.f64 fd388, fd386, fd384;
mul.f64 fd390, fd371, 0dBFE491B7523C161D;
mul.f64 fd734, fd365, 0d3FE8836FA2CF5039;
sub.f64 fd391, fd734, fd390;
mul.f64 fd392, fd371, 0d3FE8836FA2CF5039;
fma.rn.f64 fd393, fd365, 0dBFE491B7523C161D, fd392;
mul.f64 fd395, fd387, 0dBFEF838B8C811C17;
mul.f64 fd733, fd381, 0d3FC63A1A7E0B738A;
sub.f64 fd396, fd733, fd395;
mul.f64 fd397, fd387, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd398, fd381, 0dBFEF838B8C811C17, fd397;
mul.f64 fd400, fd372, 0dBFEF838B8C811C17;
mul.f64 fd732, fd366, 0d3FC63A1A7E0B738A;
sub.f64 fd401, fd732, fd400;
mul.f64 fd402, fd372, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd403, fd366, 0dBFEF838B8C811C17, fd402;
mul.f64 fd405, fd388, 0dBFD5E3A8748A0BF5;
mul.f64 fd731, fd382, 0dBFEE11F642522D1C;
sub.f64 fd406, fd731, fd405;
mul.f64 fd407, fd388, 0dBFEE11F642522D1C;
fma.rn.f64 fd408, fd382, 0dBFD5E3A8748A0BF5, fd407;
add.f64 fd409, fd358, fd374;
add.f64 fd410, fd342, fd409;
mul.f64 fd413, fd409, 0d3FE0000000000000;
sub.f64 fd414, fd342, fd413;
add.f64 fd730, fd739, fd735;
sub.f64 fd415, fd739, fd735;
mul.f64 fd416, fd415, 0d3FEBB67AE8584CAA;
add.f64 fd417, fd416, fd414;
sub.f64 fd418, fd414, fd416;
add.f64 fd729, fd744, fd730;
mul.f64 fd419, fd730, 0d3FE0000000000000;
sub.f64 fd420, fd744, fd419;
sub.f64 fd421, fd358, fd374;
mul.f64 fd422, fd421, 0d3FEBB67AE8584CAA;
sub.f64 fd423, fd420, fd422;
add.f64 fd424, fd422, fd420;
add.f64 fd425, fd391, fd396;
add.f64 fd426, fd349, fd425;
mul.f64 fd429, fd425, 0d3FE0000000000000;
sub.f64 fd430, fd349, fd429;
add.f64 fd728, fd393, fd398;
sub.f64 fd431, fd393, fd398;
mul.f64 fd432, fd431, 0d3FEBB67AE8584CAA;
add.f64 fd433, fd432, fd430;
sub.f64 fd434, fd430, fd432;
add.f64 fd727, fd355, fd728;
mul.f64 fd435, fd728, 0d3FE0000000000000;
sub.f64 fd436, fd355, fd435;
sub.f64 fd437, fd391, fd396;
mul.f64 fd438, fd437, 0d3FEBB67AE8584CAA;
sub.f64 fd439, fd436, fd438;
add.f64 fd440, fd438, fd436;
add.f64 fd441, fd401, fd406;
add.f64 fd442, fd350, fd441;
mul.f64 fd445, fd441, 0d3FE0000000000000;
sub.f64 fd446, fd350, fd445;
add.f64 fd726, fd403, fd408;
sub.f64 fd447, fd403, fd408;
mul.f64 fd448, fd447, 0d3FEBB67AE8584CAA;
add.f64 fd449, fd448, fd446;
sub.f64 fd450, fd446, fd448;
add.f64 fd725, fd356, fd726;
mul.f64 fd451, fd726, 0d3FE0000000000000;
sub.f64 fd452, fd356, fd451;
sub.f64 fd453, fd401, fd406;
mul.f64 fd454, fd453, 0d3FEBB67AE8584CAA;
sub.f64 fd455, fd452, fd454;
add.f64 fd456, fd454, fd452;
mul.f64 fd723, fd310, 0d3FEF232EFF15C9E6;
mul.f64 fd724, fd751, 0dBFCD84D223638000;
sub.f64 fd459, fd723, fd724;
mul.f64 fd460, fd751, 0d3FEF232EFF15C9E6;
fma.rn.f64 fd461, fd310, 0dBFCD84D223638000, fd460;
mul.f64 fd721, fd426, 0d3FEC98A37A9A7850;
mul.f64 fd722, fd727, 0dBFDCB920325BAFA6;
sub.f64 fd464, fd721, fd722;
mul.f64 fd465, fd727, 0d3FEC98A37A9A7850;
fma.rn.f64 fd466, fd426, 0dBFDCB920325BAFA6, fd465;
mul.f64 fd468, fd749, 0dBFDCB920325BAFA6;
mul.f64 fd720, fd326, 0d3FEC98A37A9A7850;
sub.f64 fd469, fd720, fd468;
mul.f64 fd470, fd749, 0d3FEC98A37A9A7850;
fma.rn.f64 fd471, fd326, 0dBFDCB920325BAFA6, fd470;
mul.f64 fd473, fd725, 0dBFE9AAFE4207DF5F;
mul.f64 fd719, fd442, 0d3FE31BEC55BC71BC;
sub.f64 fd474, fd719, fd473;
mul.f64 fd475, fd725, 0d3FE31BEC55BC71BC;
fma.rn.f64 fd476, fd442, 0dBFE9AAFE4207DF5F, fd475;
mul.f64 fd478, fd307, 0dBFE491B7523C161D;
mul.f64 fd718, fd301, 0d3FE8836FA2CF5039;
sub.f64 fd479, fd718, fd478;
mul.f64 fd480, fd307, 0d3FE8836FA2CF5039;
fma.rn.f64 fd481, fd301, 0dBFE491B7523C161D, fd480;
mul.f64 fd483, fd423, 0dBFEF838B8C811C17;
mul.f64 fd717, fd417, 0d3FC63A1A7E0B738A;
sub.f64 fd484, fd717, fd483;
mul.f64 fd485, fd423, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd486, fd417, 0dBFEF838B8C811C17, fd485;
mul.f64 fd488, fd323, 0dBFE9AAFE4207DF5F;
mul.f64 fd716, fd317, 0d3FE31BEC55BC71BC;
sub.f64 fd489, fd716, fd488;
mul.f64 fd490, fd323, 0d3FE31BEC55BC71BC;
fma.rn.f64 fd491, fd317, 0dBFE9AAFE4207DF5F, fd490;
mul.f64 fd493, fd439, 0dBFEEA7D99F29CADE;
mul.f64 fd715, fd433, 0dBFD25AFBF23865BF;
sub.f64 fd494, fd715, fd493;
mul.f64 fd495, fd439, 0dBFD25AFBF23865BF;
fma.rn.f64 fd496, fd433, 0dBFEEA7D99F29CADE, fd495;
mul.f64 fd713, fd333, 0d3FD9595EF26FB670;
mul.f64 fd714, fd339, 0dBFED6206BEB6C24B;
sub.f64 fd499, fd713, fd714;
mul.f64 fd500, fd339, 0d3FD9595EF26FB670;
fma.rn.f64 fd501, fd333, 0dBFED6206BEB6C24B, fd500;
mul.f64 fd711, fd449, 0dBFE5F5B105F99707;
mul.f64 fd712, fd455, 0dBFE746A51650EADE;
sub.f64 fd504, fd711, fd712;
mul.f64 fd505, fd455, 0dBFE5F5B105F99707;
fma.rn.f64 fd506, fd449, 0dBFE746A51650EADE, fd505;
mul.f64 fd709, fd302, 0d3FC63A1A7E0B738A;
mul.f64 fd710, fd308, 0dBFEF838B8C811C17;
sub.f64 fd509, fd709, fd710;
mul.f64 fd510, fd308, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd511, fd302, 0dBFEF838B8C811C17, fd510;
mul.f64 fd707, fd418, 0dBFEE11F642522D1C;
mul.f64 fd708, fd424, 0dBFD5E3A8748A0BF5;
sub.f64 fd514, fd707, fd708;
mul.f64 fd515, fd424, 0dBFEE11F642522D1C;
fma.rn.f64 fd516, fd418, 0dBFD5E3A8748A0BF5, fd515;
mul.f64 fd518, fd324, 0dBFEFF223F3635CE3;
mul.f64 fd706, fd318, 0dBFADC528B5343A86;
sub.f64 fd519, fd706, fd518;
mul.f64 fd520, fd324, 0dBFADC528B5343A86;
fma.rn.f64 fd521, fd318, 0dBFEFF223F3635CE3, fd520;
mul.f64 fd523, fd440, 0d3FBDB843E577175E;
mul.f64 fd705, fd434, 0dBFEFC89BCEF44CF4;
sub.f64 fd524, fd705, fd523;
mul.f64 fd525, fd440, 0dBFEFC89BCEF44CF4;
fma.rn.f64 fd526, fd434, 0d3FBDB843E577175E, fd525;
mul.f64 fd528, fd340, 0dBFEEA7D99F29CADE;
mul.f64 fd704, fd334, 0dBFD25AFBF23865BF;
sub.f64 fd529, fd704, fd528;
mul.f64 fd530, fd340, 0dBFD25AFBF23865BF;
fma.rn.f64 fd531, fd334, 0dBFEEA7D99F29CADE, fd530;
mul.f64 fd533, fd456, 0d3FE19593DA358510;
mul.f64 fd703, fd450, 0dBFEABC50EF4734A7;
sub.f64 fd534, fd703, fd533;
mul.f64 fd535, fd456, 0dBFEABC50EF4734A7;
fma.rn.f64 fd536, fd450, 0d3FE19593DA358510, fd535;
add.f64 fd537, fd294, fd410;
mul.f64 fd539, fd537, 0d3FE0000000000000;
sub.f64 fd540, fd178, fd539;
add.f64 fd702, fd753, fd729;
sub.f64 fd541, fd753, fd729;
mul.f64 fd542, fd541, 0d3FEBB67AE8584CAA;
mul.f64 fd543, fd702, 0d3FE0000000000000;
sub.f64 fd544, fd778, fd543;
sub.f64 fd545, fd294, fd410;
mul.f64 fd546, fd545, 0d3FEBB67AE8584CAA;
add.f64 fd547, fd459, fd464;
mul.f64 fd549, fd547, 0d3FE0000000000000;
sub.f64 fd550, fd194, fd549;
add.f64 fd701, fd461, fd466;
sub.f64 fd551, fd461, fd466;
mul.f64 fd552, fd551, 0d3FEBB67AE8584CAA;
mul.f64 fd553, fd701, 0d3FE0000000000000;
sub.f64 fd554, fd776, fd553;
sub.f64 fd555, fd459, fd464;
mul.f64 fd556, fd555, 0d3FEBB67AE8584CAA;
add.f64 fd557, fd469, fd474;
mul.f64 fd559, fd557, 0d3FE0000000000000;
sub.f64 fd560, fd210, fd559;
add.f64 fd700, fd471, fd476;
sub.f64 fd561, fd471, fd476;
mul.f64 fd562, fd561, 0d3FEBB67AE8584CAA;
mul.f64 fd563, fd700, 0d3FE0000000000000;
sub.f64 fd564, fd774, fd563;
sub.f64 fd565, fd469, fd474;
mul.f64 fd566, fd565, 0d3FEBB67AE8584CAA;
add.f64 fd567, fd479, fd484;
mul.f64 fd569, fd567, 0d3FE0000000000000;
sub.f64 fd570, fd185, fd569;
add.f64 fd699, fd481, fd486;
sub.f64 fd571, fd481, fd486;
mul.f64 fd572, fd571, 0d3FEBB67AE8584CAA;
mul.f64 fd573, fd699, 0d3FE0000000000000;
sub.f64 fd574, fd191, fd573;
sub.f64 fd575, fd479, fd484;
mul.f64 fd576, fd575, 0d3FEBB67AE8584CAA;
add.f64 fd577, fd489, fd494;
mul.f64 fd579, fd577, 0d3FE0000000000000;
sub.f64 fd580, fd201, fd579;
add.f64 fd698, fd491, fd496;
sub.f64 fd581, fd491, fd496;
mul.f64 fd582, fd581, 0d3FEBB67AE8584CAA;
mul.f64 fd583, fd698, 0d3FE0000000000000;
sub.f64 fd584, fd207, fd583;
sub.f64 fd585, fd489, fd494;
mul.f64 fd586, fd585, 0d3FEBB67AE8584CAA;
add.f64 fd587, fd499, fd504;
mul.f64 fd589, fd587, 0d3FE0000000000000;
sub.f64 fd590, fd217, fd589;
add.f64 fd697, fd501, fd506;
sub.f64 fd591, fd501, fd506;
mul.f64 fd592, fd591, 0d3FEBB67AE8584CAA;
mul.f64 fd593, fd697, 0d3FE0000000000000;
sub.f64 fd594, fd223, fd593;
sub.f64 fd595, fd499, fd504;
mul.f64 fd596, fd595, 0d3FEBB67AE8584CAA;
add.f64 fd597, fd509, fd514;
mul.f64 fd599, fd597, 0d3FE0000000000000;
sub.f64 fd600, fd186, fd599;
add.f64 fd696, fd511, fd516;
sub.f64 fd601, fd511, fd516;
mul.f64 fd602, fd601, 0d3FEBB67AE8584CAA;
mul.f64 fd603, fd696, 0d3FE0000000000000;
sub.f64 fd604, fd192, fd603;
sub.f64 fd605, fd509, fd514;
mul.f64 fd606, fd605, 0d3FEBB67AE8584CAA;
add.f64 fd607, fd519, fd524;
mul.f64 fd609, fd607, 0d3FE0000000000000;
sub.f64 fd610, fd202, fd609;
add.f64 fd695, fd521, fd526;
sub.f64 fd611, fd521, fd526;
mul.f64 fd612, fd611, 0d3FEBB67AE8584CAA;
mul.f64 fd613, fd695, 0d3FE0000000000000;
sub.f64 fd614, fd208, fd613;
sub.f64 fd615, fd519, fd524;
mul.f64 fd616, fd615, 0d3FEBB67AE8584CAA;
add.f64 fd617, fd529, fd534;
mul.f64 fd619, fd617, 0d3FE0000000000000;
sub.f64 fd620, fd218, fd619;
add.f64 fd694, fd531, fd536;
sub.f64 fd621, fd531, fd536;
mul.f64 fd622, fd621, 0d3FEBB67AE8584CAA;
mul.f64 fd623, fd694, 0d3FE0000000000000;
sub.f64 fd624, fd224, fd623;
sub.f64 fd625, fd529, fd534;
mul.f64 fd791, fd700, 0d3FE0000000000000;
sub.f64 fd790, fd774, fd791;
mul.f64 fd626, fd625, 0d3FEBB67AE8584CAA;
add.f64 %1, fd778, fd702;
mul.f64 fd793, fd537, 0d3FE0000000000000;
sub.f64 fd792, fd178, fd793;
add.f64 %0, fd178, fd537;
mul.f64 fd795, fd701, 0d3FE0000000000000;
sub.f64 fd794, fd776, fd795;
add.f64 %3, fd776, fd701;
add.f64 %2, fd194, fd547;
add.f64 %5, fd774, fd700;
add.f64 %4, fd210, fd557;
add.f64 %7, fd191, fd699;
add.f64 %6, fd185, fd567;
add.f64 %9, fd207, fd698;
add.f64 %8, fd201, fd577;
add.f64 %11, fd223, fd697;
add.f64 %10, fd217, fd587;
add.f64 %13, fd192, fd696;
add.f64 %12, fd186, fd597;
add.f64 %15, fd208, fd695;
add.f64 %14, fd202, fd607;
add.f64 %17, fd224, fd694;
add.f64 %16, fd218, fd617;
add.f64 %18, fd542, fd792;
sub.f64 %19, fd544, fd546;
sub.f64 %21, fd794, fd556;
add.f64 %20, fd552, fd550;
sub.f64 %23, fd790, fd566;
add.f64 %22, fd562, fd560;
sub.f64 %25, fd574, fd576;
add.f64 %24, fd572, fd570;
add.f64 %26, fd582, fd580;
sub.f64 %27, fd584, fd586;
add.f64 %28, fd592, fd590;
sub.f64 %29, fd594, fd596;
add.f64 %30, fd602, fd600;
sub.f64 %31, fd604, fd606;
add.f64 %32, fd612, fd610;
sub.f64 %33, fd614, fd616;
sub.f64 %35, fd624, fd626;
add.f64 %34, fd622, fd620;
add.f64 %37, fd546, fd544;
sub.f64 %36, fd792, fd542;
add.f64 %39, fd556, fd794;
sub.f64 %38, fd550, fd552;
add.f64 %41, fd566, fd790;
sub.f64 %40, fd560, fd562;
add.f64 %43, fd576, fd574;
sub.f64 %42, fd570, fd572;
add.f64 %45, fd586, fd584;
sub.f64 %44, fd580, fd582;
add.f64 %47, fd596, fd594;
sub.f64 %46, fd590, fd592;
add.f64 %49, fd606, fd604;
sub.f64 %48, fd600, fd602;
add.f64 %51, fd616, fd614;
sub.f64 %50, fd610, fd612;
add.f64 %53, fd626, fd624;
sub.f64 %52, fd620, fd622;
})"
     : "=d"(rmem[0].x), "=d"(rmem[0].y), "=d"(rmem[1].x), "=d"(rmem[1].y), "=d"(rmem[2].x), "=d"(rmem[2].y), "=d"(rmem[3].x), "=d"(rmem[3].y), "=d"(rmem[4].x), "=d"(rmem[4].y), "=d"(rmem[5].x), "=d"(rmem[5].y), "=d"(rmem[6].x), "=d"(rmem[6].y), "=d"(rmem[7].x), "=d"(rmem[7].y), "=d"(rmem[8].x), "=d"(rmem[8].y), "=d"(rmem[9].x), "=d"(rmem[9].y), "=d"(rmem[10].x), "=d"(rmem[10].y), "=d"(rmem[11].x), "=d"(rmem[11].y), "=d"(rmem[12].x), "=d"(rmem[12].y), "=d"(rmem[13].x), "=d"(rmem[13].y), "=d"(rmem[14].x), "=d"(rmem[14].y), "=d"(rmem[15].x), "=d"(rmem[15].y), "=d"(rmem[16].x), "=d"(rmem[16].y), "=d"(rmem[17].x), "=d"(rmem[17].y), "=d"(rmem[18].x), "=d"(rmem[18].y), "=d"(rmem[19].x), "=d"(rmem[19].y), "=d"(rmem[20].x), "=d"(rmem[20].y), "=d"(rmem[21].x), "=d"(rmem[21].y), "=d"(rmem[22].x), "=d"(rmem[22].y), "=d"(rmem[23].x), "=d"(rmem[23].y), "=d"(rmem[24].x), "=d"(rmem[24].y), "=d"(rmem[25].x), "=d"(rmem[25].y), "=d"(rmem[26].x), "=d"(rmem[26].y): "d"(rmem[0].x), "d"(rmem[0].y), "d"(rmem[1].x), "d"(rmem[1].y), "d"(rmem[2].x), "d"(rmem[2].y), "d"(rmem[3].x), "d"(rmem[3].y), "d"(rmem[4].x), "d"(rmem[4].y), "d"(rmem[5].x), "d"(rmem[5].y), "d"(rmem[6].x), "d"(rmem[6].y), "d"(rmem[7].x), "d"(rmem[7].y), "d"(rmem[8].x), "d"(rmem[8].y), "d"(rmem[9].x), "d"(rmem[9].y), "d"(rmem[10].x), "d"(rmem[10].y), "d"(rmem[11].x), "d"(rmem[11].y), "d"(rmem[12].x), "d"(rmem[12].y), "d"(rmem[13].x), "d"(rmem[13].y), "d"(rmem[14].x), "d"(rmem[14].y), "d"(rmem[15].x), "d"(rmem[15].y), "d"(rmem[16].x), "d"(rmem[16].y), "d"(rmem[17].x), "d"(rmem[17].y), "d"(rmem[18].x), "d"(rmem[18].y), "d"(rmem[19].x), "d"(rmem[19].y), "d"(rmem[20].x), "d"(rmem[20].y), "d"(rmem[21].x), "d"(rmem[21].y), "d"(rmem[22].x), "d"(rmem[22].y), "d"(rmem[23].x), "d"(rmem[23].y), "d"(rmem[24].x), "d"(rmem[24].y), "d"(rmem[25].x), "d"(rmem[25].y), "d"(rmem[26].x), "d"(rmem[26].y), "d"(rmem[19].y), "d"(rmem[10].y), "d"(rmem[1].y), "d"(rmem[13].y), "d"(rmem[22].y), "d"(rmem[4].y), "d"(rmem[25].y), "d"(rmem[16].y), "d"(rmem[7].y), "d"(rmem[20].y), "d"(rmem[11].y), "d"(rmem[2].y), "d"(rmem[23].y), "d"(rmem[14].y), "d"(rmem[5].y), "d"(rmem[17].y), "d"(rmem[8].y));
};




template<> __forceinline__ __device__ void cufftdx_private_function<506, double, 1>(cufftdx::detail::complex<double> *rmem, unsigned smem){

asm volatile (R"({
.reg .b32 r<12>;
.reg .f64 fd<315>;
.reg .b64 rd<7>;
mov.u32 r1, %tid.y;
mov.u32 r2, %18;
mad.lo.s32 r3, r1, 432, r2;
add.f64 fd37, %28, %36;
add.f64 fd38, %20, fd37;
add.f64 fd39, %29, %37;
add.f64 fd40, %21, fd39;
mul.f64 fd41, fd37, 0d3FE0000000000000;
sub.f64 fd42, %20, fd41;
sub.f64 fd43, %29, %37;
mul.f64 fd44, fd43, 0d3FEBB67AE8584CAA;
add.f64 fd45, fd44, fd42;
sub.f64 fd46, fd42, fd44;
mul.f64 fd47, fd39, 0d3FE0000000000000;
sub.f64 fd48, %21, fd47;
sub.f64 fd49, %28, %36;
mul.f64 fd50, fd49, 0d3FEBB67AE8584CAA;
sub.f64 fd51, fd48, fd50;
add.f64 fd52, fd50, fd48;
add.f64 fd53, %30, %38;
add.f64 fd54, %22, fd53;
add.f64 fd55, %32, %40;
add.f64 fd56, %24, fd55;
mul.f64 fd57, fd53, 0d3FE0000000000000;
sub.f64 fd58, %22, fd57;
sub.f64 fd59, %32, %40;
mul.f64 fd60, fd59, 0d3FEBB67AE8584CAA;
add.f64 fd61, fd60, fd58;
sub.f64 fd62, fd58, fd60;
mul.f64 fd63, fd55, 0d3FE0000000000000;
sub.f64 fd64, %24, fd63;
sub.f64 fd65, %30, %38;
mul.f64 fd66, fd65, 0d3FEBB67AE8584CAA;
sub.f64 fd67, fd64, fd66;
add.f64 fd68, fd66, fd64;
add.f64 fd69, %33, %41;
add.f64 fd70, %25, fd69;
add.f64 fd71, %35, %42;
add.f64 fd72, %27, fd71;
mul.f64 fd73, fd69, 0d3FE0000000000000;
sub.f64 fd74, %25, fd73;
sub.f64 fd75, %35, %42;
mul.f64 fd76, fd75, 0d3FEBB67AE8584CAA;
add.f64 fd77, fd76, fd74;
sub.f64 fd78, fd74, fd76;
mul.f64 fd79, fd71, 0d3FE0000000000000;
sub.f64 fd80, %27, fd79;
sub.f64 fd81, %33, %41;
mul.f64 fd82, fd81, 0d3FEBB67AE8584CAA;
sub.f64 fd83, fd80, fd82;
add.f64 fd84, fd82, fd80;
mov.u32 r4, %tid.x;
mul.f64 fd85, fd61, 0d3FE8836FA2CF5039;
mul.f64 fd86, fd67, 0dBFE491B7523C161D;
sub.f64 fd87, fd85, fd86;
mul.f64 fd88, fd67, 0d3FE8836FA2CF5039;
fma.rn.f64 fd89, fd61, 0dBFE491B7523C161D, fd88;
mul.f64 fd90, fd77, 0d3FC63A1A7E0B738A;
mul.f64 fd91, fd83, 0dBFEF838B8C811C17;
sub.f64 fd92, fd90, fd91;
mul.f64 fd93, fd83, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd94, fd77, 0dBFEF838B8C811C17, fd93;
mul.f64 fd95, fd62, 0d3FC63A1A7E0B738A;
mul.f64 fd96, fd68, 0dBFEF838B8C811C17;
sub.f64 fd97, fd95, fd96;
mul.f64 fd98, fd68, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd99, fd62, 0dBFEF838B8C811C17, fd98;
mul.f64 fd100, fd78, 0dBFEE11F642522D1C;
mul.f64 fd101, fd84, 0dBFD5E3A8748A0BF5;
sub.f64 fd102, fd100, fd101;
mul.f64 fd103, fd84, 0dBFEE11F642522D1C;
fma.rn.f64 fd104, fd78, 0dBFD5E3A8748A0BF5, fd103;
add.f64 fd105, fd54, fd70;
add.f64 fd106, fd56, fd72;
mul.f64 fd107, fd105, 0d3FE0000000000000;
sub.f64 fd108, fd38, fd107;
sub.f64 fd109, fd56, fd72;
mul.f64 fd110, fd109, 0d3FEBB67AE8584CAA;
add.f64 fd111, fd110, fd108;
sub.f64 fd112, fd108, fd110;
mul.f64 fd113, fd106, 0d3FE0000000000000;
sub.f64 fd114, fd40, fd113;
sub.f64 fd115, fd54, fd70;
mul.f64 fd116, fd115, 0d3FEBB67AE8584CAA;
sub.f64 fd117, fd114, fd116;
add.f64 fd118, fd116, fd114;
add.f64 fd119, fd87, fd92;
add.f64 fd120, fd45, fd119;
add.f64 fd121, fd89, fd94;
add.f64 fd122, fd51, fd121;
mul.f64 fd123, fd119, 0d3FE0000000000000;
sub.f64 fd124, fd45, fd123;
sub.f64 fd125, fd89, fd94;
mul.f64 fd126, fd125, 0d3FEBB67AE8584CAA;
add.f64 fd127, fd126, fd124;
sub.f64 fd128, fd124, fd126;
mul.f64 fd129, fd121, 0d3FE0000000000000;
sub.f64 fd130, fd51, fd129;
sub.f64 fd131, fd87, fd92;
mul.f64 fd132, fd131, 0d3FEBB67AE8584CAA;
sub.f64 fd133, fd130, fd132;
add.f64 fd134, fd132, fd130;
add.f64 fd135, fd97, fd102;
add.f64 fd136, fd46, fd135;
add.f64 fd137, fd99, fd104;
add.f64 fd138, fd52, fd137;
mul.f64 fd139, fd135, 0d3FE0000000000000;
sub.f64 fd140, fd46, fd139;
sub.f64 fd141, fd99, fd104;
mul.f64 fd142, fd141, 0d3FEBB67AE8584CAA;
add.f64 fd143, fd142, fd140;
sub.f64 fd144, fd140, fd142;
mul.f64 fd145, fd137, 0d3FE0000000000000;
sub.f64 fd146, fd52, fd145;
sub.f64 fd147, fd97, fd102;
mul.f64 fd148, fd147, 0d3FEBB67AE8584CAA;
sub.f64 fd149, fd146, fd148;
add.f64 fd150, fd148, fd146;
mul.wide.u32 rd2, r4, -1431655765;
shr.u64 rd3, rd2, 33;
cvt.u32.u64 r5, rd3;
mul.lo.s32 r6, r5, 3;
sub.s32 r7, r4, r6;
mad.lo.s32 r8, r5, 432, r3;
mul.wide.u32 rd4, r7, 16;
mov.u64 rd5, %19;
add.s64 rd6, rd5, rd4;
ld.global.v2.f64 {fd151, fd152}, [rd6];
mul.f64 fd155, fd151, fd120;
mul.f64 fd156, fd152, fd122;
mul.f64 fd157, fd151, fd122;
mul.f64 fd158, fd151, fd151;
mul.f64 fd159, fd152, fd152;
sub.f64 fd160, fd158, fd159;
mul.f64 fd161, fd152, fd151;
fma.rn.f64 fd162, fd152, fd151, fd161;
mul.f64 fd163, fd160, fd136;
mul.f64 fd164, fd162, fd138;
mul.f64 fd165, fd160, fd138;
mul.f64 fd166, fd151, fd160;
mul.f64 fd167, fd152, fd162;
sub.f64 fd168, fd166, fd167;
mul.f64 fd169, fd151, fd162;
fma.rn.f64 fd170, fd152, fd160, fd169;
mul.f64 fd171, fd168, fd111;
mul.f64 fd172, fd170, fd117;
mul.f64 fd173, fd168, fd117;
mul.f64 fd174, fd151, fd168;
mul.f64 fd175, fd152, fd170;
sub.f64 fd176, fd174, fd175;
mul.f64 fd177, fd151, fd170;
fma.rn.f64 fd178, fd152, fd168, fd177;
mul.f64 fd179, fd176, fd127;
mul.f64 fd180, fd178, fd133;
mul.f64 fd181, fd176, fd133;
ld.global.v2.f64 {fd182, fd183}, [rd6+48];
mul.f64 fd186, fd182, fd143;
mul.f64 fd187, fd183, fd149;
mul.f64 fd188, fd182, fd149;
mul.f64 fd189, fd151, fd182;
mul.f64 fd190, fd152, fd183;
sub.f64 fd191, fd189, fd190;
mul.f64 fd192, fd151, fd183;
fma.rn.f64 fd193, fd152, fd182, fd192;
mul.f64 fd194, fd191, fd112;
mul.f64 fd195, fd193, fd118;
mul.f64 fd196, fd191, fd118;
mul.f64 fd197, fd151, fd191;
mul.f64 fd198, fd152, fd193;
sub.f64 fd199, fd197, fd198;
mul.f64 fd200, fd151, fd193;
fma.rn.f64 fd201, fd152, fd191, fd200;
mul.f64 fd202, fd199, fd128;
mul.f64 fd203, fd201, fd134;
mul.f64 fd204, fd199, fd134;
mul.f64 fd205, fd151, fd199;
mul.f64 fd206, fd152, fd201;
sub.f64 fd207, fd205, fd206;
mul.f64 fd208, fd151, fd201;
fma.rn.f64 fd209, fd152, fd199, fd208;
mul.f64 fd210, fd207, fd144;
mul.f64 fd211, fd209, fd150;
mul.f64 fd212, fd207, fd150;
barrier.sync 0;
mad.lo.s32 r9, r7, 144, r8;
add.f64 fd213, fd40, fd106;
add.f64 fd214, fd38, fd105;
st.shared.v2.f64 [r9], {fd214, fd213};
fma.rn.f64 fd215, fd152, fd120, fd157;
sub.f64 fd216, fd155, fd156;
st.shared.v2.f64 [r9+16], {fd216, fd215};
fma.rn.f64 fd217, fd162, fd136, fd165;
sub.f64 fd218, fd163, fd164;
st.shared.v2.f64 [r9+32], {fd218, fd217};
sub.f64 fd219, fd171, fd172;
fma.rn.f64 fd220, fd170, fd111, fd173;
st.shared.v2.f64 [r9+48], {fd219, fd220};
fma.rn.f64 fd221, fd178, fd127, fd181;
sub.f64 fd222, fd179, fd180;
st.shared.v2.f64 [r9+64], {fd222, fd221};
fma.rn.f64 fd223, fd183, fd143, fd188;
sub.f64 fd224, fd186, fd187;
st.shared.v2.f64 [r9+80], {fd224, fd223};
fma.rn.f64 fd225, fd193, fd112, fd196;
sub.f64 fd226, fd194, fd195;
st.shared.v2.f64 [r9+96], {fd226, fd225};
fma.rn.f64 fd227, fd201, fd128, fd204;
sub.f64 fd228, fd202, fd203;
st.shared.v2.f64 [r9+112], {fd228, fd227};
fma.rn.f64 fd229, fd209, fd144, fd212;
sub.f64 fd230, fd210, fd211;
st.shared.v2.f64 [r9+128], {fd230, fd229};
barrier.sync 0;
shl.b32 r10, r7, 7;
sub.s32 r11, r9, r10;
ld.shared.v2.f64 {fd231, fd232}, [r11];
ld.shared.v2.f64 {fd235, fd236}, [r11+48];
ld.shared.v2.f64 {fd239, fd240}, [r11+96];
ld.shared.v2.f64 {fd243, fd244}, [r11+144];
ld.shared.v2.f64 {fd247, fd248}, [r11+192];
ld.shared.v2.f64 {fd251, fd252}, [r11+240];
ld.shared.v2.f64 {fd255, fd256}, [r11+288];
ld.shared.v2.f64 {fd259, fd260}, [r11+336];
ld.shared.v2.f64 {fd263, fd264}, [r11+384];
add.f64 fd267, fd243, fd255;
add.f64 fd268, fd244, fd256;
mul.f64 fd269, fd267, 0d3FE0000000000000;
sub.f64 fd270, fd231, fd269;
sub.f64 fd271, fd244, fd256;
mul.f64 fd272, fd271, 0d3FEBB67AE8584CAA;
mul.f64 fd273, fd268, 0d3FE0000000000000;
sub.f64 fd274, fd232, fd273;
sub.f64 fd275, fd243, fd255;
mul.f64 fd276, fd275, 0d3FEBB67AE8584CAA;
add.f64 fd277, fd247, fd259;
add.f64 fd278, fd248, fd260;
mul.f64 fd279, fd277, 0d3FE0000000000000;
sub.f64 fd280, fd235, fd279;
sub.f64 fd281, fd248, fd260;
mul.f64 fd282, fd281, 0d3FEBB67AE8584CAA;
mul.f64 fd283, fd278, 0d3FE0000000000000;
sub.f64 fd284, fd236, fd283;
sub.f64 fd285, fd247, fd259;
mul.f64 fd286, fd285, 0d3FEBB67AE8584CAA;
add.f64 fd287, fd251, fd263;
add.f64 fd288, fd252, fd264;
mul.f64 fd289, fd287, 0d3FE0000000000000;
sub.f64 fd290, fd239, fd289;
sub.f64 fd291, fd252, fd264;
mul.f64 fd292, fd291, 0d3FEBB67AE8584CAA;
mul.f64 fd293, fd288, 0d3FE0000000000000;
sub.f64 fd294, fd240, fd293;
sub.f64 fd295, fd251, fd263;
mul.f64 fd296, fd295, 0d3FEBB67AE8584CAA;
add.f64 %1, fd232, fd268;
add.f64 %0, fd231, fd267;
add.f64 %3, fd236, fd278;
add.f64 %2, fd235, fd277;
add.f64 %5, fd240, fd288;
add.f64 %4, fd239, fd287;
sub.f64 %7, fd274, fd276;
add.f64 %6, fd272, fd270;
sub.f64 %9, fd284, fd286;
add.f64 %8, fd282, fd280;
sub.f64 %11, fd294, fd296;
add.f64 %10, fd292, fd290;
add.f64 %13, fd276, fd274;
sub.f64 %12, fd270, fd272;
add.f64 %15, fd286, fd284;
sub.f64 %14, fd280, fd282;
add.f64 %17, fd296, fd294;
sub.f64 %16, fd290, fd292;
})"
     : "=d"(rmem[0].x), "=d"(rmem[0].y), "=d"(rmem[1].x), "=d"(rmem[1].y), "=d"(rmem[2].x), "=d"(rmem[2].y), "=d"(rmem[3].x), "=d"(rmem[3].y), "=d"(rmem[4].x), "=d"(rmem[4].y), "=d"(rmem[5].x), "=d"(rmem[5].y), "=d"(rmem[6].x), "=d"(rmem[6].y), "=d"(rmem[7].x), "=d"(rmem[7].y), "=d"(rmem[8].x), "=d"(rmem[8].y): "r"(smem), "l"(lut_dp_9_27), "d"(rmem[0].x), "d"(rmem[0].y), "d"(rmem[1].x), "d"(rmem[1].y), "d"(rmem[1].y), "d"(rmem[2].x), "d"(rmem[2].y), "d"(rmem[2].y), "d"(rmem[3].x), "d"(rmem[3].y), "d"(rmem[4].x), "d"(rmem[4].y), "d"(rmem[4].y), "d"(rmem[5].x), "d"(rmem[5].y), "d"(rmem[5].y), "d"(rmem[6].x), "d"(rmem[6].y), "d"(rmem[7].x), "d"(rmem[7].y), "d"(rmem[7].y), "d"(rmem[8].x), "d"(rmem[8].y));
};




template<> __forceinline__ __device__ void cufftdx_private_function<505, double, 1>(cufftdx::detail::complex<double> *rmem, unsigned smem){

asm volatile (R"({
.reg .b32 r<12>;
.reg .f64 fd<297>;
.reg .b64 rd<7>;
mov.u32 r1, %tid.y;
mov.u32 r2, %18;
mad.lo.s32 r3, r1, 216, r2;
add.f64 fd37, %28, %36;
add.f64 fd38, %20, fd37;
add.f64 fd39, %29, %37;
add.f64 fd40, %21, fd39;
mul.f64 fd41, fd37, 0d3FE0000000000000;
sub.f64 fd42, %20, fd41;
sub.f64 fd43, %29, %37;
mul.f64 fd44, fd43, 0d3FEBB67AE8584CAA;
add.f64 fd45, fd44, fd42;
sub.f64 fd46, fd42, fd44;
mul.f64 fd47, fd39, 0d3FE0000000000000;
sub.f64 fd48, %21, fd47;
sub.f64 fd49, %28, %36;
mul.f64 fd50, fd49, 0d3FEBB67AE8584CAA;
sub.f64 fd51, fd48, fd50;
add.f64 fd52, fd50, fd48;
add.f64 fd53, %30, %38;
add.f64 fd54, %22, fd53;
add.f64 fd55, %32, %40;
add.f64 fd56, %24, fd55;
mul.f64 fd57, fd53, 0d3FE0000000000000;
sub.f64 fd58, %22, fd57;
sub.f64 fd59, %32, %40;
mul.f64 fd60, fd59, 0d3FEBB67AE8584CAA;
add.f64 fd61, fd60, fd58;
sub.f64 fd62, fd58, fd60;
mul.f64 fd63, fd55, 0d3FE0000000000000;
sub.f64 fd64, %24, fd63;
sub.f64 fd65, %30, %38;
mul.f64 fd66, fd65, 0d3FEBB67AE8584CAA;
sub.f64 fd67, fd64, fd66;
add.f64 fd68, fd66, fd64;
add.f64 fd69, %33, %41;
add.f64 fd70, %25, fd69;
add.f64 fd71, %35, %42;
add.f64 fd72, %27, fd71;
mul.f64 fd73, fd69, 0d3FE0000000000000;
sub.f64 fd74, %25, fd73;
sub.f64 fd75, %35, %42;
mul.f64 fd76, fd75, 0d3FEBB67AE8584CAA;
add.f64 fd77, fd76, fd74;
sub.f64 fd78, fd74, fd76;
mul.f64 fd79, fd71, 0d3FE0000000000000;
sub.f64 fd80, %27, fd79;
sub.f64 fd81, %33, %41;
mul.f64 fd82, fd81, 0d3FEBB67AE8584CAA;
sub.f64 fd83, fd80, fd82;
add.f64 fd84, fd82, fd80;
mov.u32 r4, %tid.x;
mul.f64 fd85, fd61, 0d3FE8836FA2CF5039;
mul.f64 fd86, fd67, 0dBFE491B7523C161D;
sub.f64 fd87, fd85, fd86;
mul.f64 fd88, fd67, 0d3FE8836FA2CF5039;
fma.rn.f64 fd89, fd61, 0dBFE491B7523C161D, fd88;
mul.f64 fd90, fd77, 0d3FC63A1A7E0B738A;
mul.f64 fd91, fd83, 0dBFEF838B8C811C17;
sub.f64 fd92, fd90, fd91;
mul.f64 fd93, fd83, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd94, fd77, 0dBFEF838B8C811C17, fd93;
mul.f64 fd95, fd62, 0d3FC63A1A7E0B738A;
mul.f64 fd96, fd68, 0dBFEF838B8C811C17;
sub.f64 fd97, fd95, fd96;
mul.f64 fd98, fd68, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd99, fd62, 0dBFEF838B8C811C17, fd98;
mul.f64 fd100, fd78, 0dBFEE11F642522D1C;
mul.f64 fd101, fd84, 0dBFD5E3A8748A0BF5;
sub.f64 fd102, fd100, fd101;
mul.f64 fd103, fd84, 0dBFEE11F642522D1C;
fma.rn.f64 fd104, fd78, 0dBFD5E3A8748A0BF5, fd103;
add.f64 fd105, fd54, fd70;
add.f64 fd106, fd38, fd105;
add.f64 fd107, fd56, fd72;
add.f64 fd108, fd40, fd107;
mul.f64 fd109, fd105, 0d3FE0000000000000;
sub.f64 fd110, fd38, fd109;
sub.f64 fd111, fd56, fd72;
mul.f64 fd112, fd111, 0d3FEBB67AE8584CAA;
add.f64 fd113, fd112, fd110;
sub.f64 fd114, fd110, fd112;
mul.f64 fd115, fd107, 0d3FE0000000000000;
sub.f64 fd116, fd40, fd115;
sub.f64 fd117, fd54, fd70;
mul.f64 fd118, fd117, 0d3FEBB67AE8584CAA;
sub.f64 fd119, fd116, fd118;
add.f64 fd120, fd118, fd116;
add.f64 fd121, fd87, fd92;
add.f64 fd122, fd45, fd121;
add.f64 fd123, fd89, fd94;
add.f64 fd124, fd51, fd123;
mul.f64 fd125, fd121, 0d3FE0000000000000;
sub.f64 fd126, fd45, fd125;
sub.f64 fd127, fd89, fd94;
mul.f64 fd128, fd127, 0d3FEBB67AE8584CAA;
add.f64 fd129, fd128, fd126;
sub.f64 fd130, fd126, fd128;
mul.f64 fd131, fd123, 0d3FE0000000000000;
sub.f64 fd132, fd51, fd131;
sub.f64 fd133, fd87, fd92;
mul.f64 fd134, fd133, 0d3FEBB67AE8584CAA;
sub.f64 fd135, fd132, fd134;
add.f64 fd136, fd134, fd132;
add.f64 fd137, fd97, fd102;
add.f64 fd138, fd46, fd137;
add.f64 fd139, fd99, fd104;
add.f64 fd140, fd52, fd139;
mul.f64 fd141, fd137, 0d3FE0000000000000;
sub.f64 fd142, fd46, fd141;
sub.f64 fd143, fd99, fd104;
mul.f64 fd144, fd143, 0d3FEBB67AE8584CAA;
add.f64 fd145, fd144, fd142;
sub.f64 fd146, fd142, fd144;
mul.f64 fd147, fd139, 0d3FE0000000000000;
sub.f64 fd148, fd52, fd147;
sub.f64 fd149, fd97, fd102;
mul.f64 fd150, fd149, 0d3FEBB67AE8584CAA;
sub.f64 fd151, fd148, fd150;
add.f64 fd152, fd150, fd148;
mul.wide.u32 rd2, r4, -1431655765;
shr.u64 rd3, rd2, 33;
cvt.u32.u64 r5, rd3;
mul.lo.s32 r6, r5, 3;
sub.s32 r7, r4, r6;
mul.wide.u32 rd4, r7, 16;
mov.u64 rd5, %19;
add.s64 rd6, rd5, rd4;
ld.global.v2.f64 {fd153, fd154}, [rd6];
mul.f64 fd157, fd153, fd122;
mul.f64 fd158, fd154, fd124;
sub.f64 fd159, fd157, fd158;
mul.f64 fd160, fd153, fd124;
fma.rn.f64 fd161, fd154, fd122, fd160;
mul.f64 fd162, fd153, fd153;
mul.f64 fd163, fd154, fd154;
sub.f64 fd164, fd162, fd163;
mul.f64 fd165, fd154, fd153;
fma.rn.f64 fd166, fd154, fd153, fd165;
mul.f64 fd167, fd164, fd138;
mul.f64 fd168, fd166, fd140;
sub.f64 fd169, fd167, fd168;
mul.f64 fd170, fd164, fd140;
fma.rn.f64 fd171, fd166, fd138, fd170;
mul.f64 fd172, fd153, fd164;
mul.f64 fd173, fd154, fd166;
sub.f64 fd174, fd172, fd173;
mul.f64 fd175, fd153, fd166;
fma.rn.f64 fd176, fd154, fd164, fd175;
mul.f64 fd177, fd174, fd113;
mul.f64 fd178, fd176, fd119;
sub.f64 fd179, fd177, fd178;
mul.f64 fd180, fd174, fd119;
fma.rn.f64 fd181, fd176, fd113, fd180;
mul.f64 fd182, fd153, fd174;
mul.f64 fd183, fd154, fd176;
sub.f64 fd184, fd182, fd183;
mul.f64 fd185, fd153, fd176;
fma.rn.f64 fd186, fd154, fd174, fd185;
mul.f64 fd187, fd184, fd129;
mul.f64 fd188, fd186, fd135;
sub.f64 fd189, fd187, fd188;
mul.f64 fd190, fd184, fd135;
fma.rn.f64 fd191, fd186, fd129, fd190;
ld.global.v2.f64 {fd192, fd193}, [rd6+48];
mul.f64 fd196, fd192, fd145;
mul.f64 fd197, fd193, fd151;
sub.f64 fd198, fd196, fd197;
mul.f64 fd199, fd192, fd151;
fma.rn.f64 fd200, fd193, fd145, fd199;
mul.f64 fd201, fd153, fd192;
mul.f64 fd202, fd154, fd193;
sub.f64 fd203, fd201, fd202;
mul.f64 fd204, fd153, fd193;
fma.rn.f64 fd205, fd154, fd192, fd204;
mul.f64 fd206, fd203, fd114;
mul.f64 fd207, fd205, fd120;
sub.f64 fd208, fd206, fd207;
mul.f64 fd209, fd203, fd120;
fma.rn.f64 fd210, fd205, fd114, fd209;
mul.f64 fd211, fd153, fd203;
mul.f64 fd212, fd154, fd205;
sub.f64 fd213, fd211, fd212;
mul.f64 fd214, fd153, fd205;
fma.rn.f64 fd215, fd154, fd203, fd214;
mul.f64 fd216, fd213, fd130;
mul.f64 fd217, fd215, fd136;
sub.f64 fd218, fd216, fd217;
mul.f64 fd219, fd213, fd136;
fma.rn.f64 fd220, fd215, fd130, fd219;
mul.f64 fd221, fd153, fd213;
mul.f64 fd222, fd154, fd215;
sub.f64 fd223, fd221, fd222;
mul.f64 fd224, fd153, fd215;
fma.rn.f64 fd225, fd154, fd213, fd224;
mul.f64 fd226, fd223, fd146;
mul.f64 fd227, fd225, fd152;
sub.f64 fd228, fd226, fd227;
mul.f64 fd229, fd223, fd152;
fma.rn.f64 fd230, fd225, fd146, fd229;
mad.lo.s32 r8, r5, 216, r3;
barrier.sync 0;
mad.lo.s32 r9, r7, 72, r8;
st.shared.f64 [r9], fd106;
st.shared.f64 [r9+8], fd159;
st.shared.f64 [r9+16], fd169;
st.shared.f64 [r9+24], fd179;
st.shared.f64 [r9+32], fd189;
st.shared.f64 [r9+40], fd198;
st.shared.f64 [r9+48], fd208;
st.shared.f64 [r9+56], fd218;
st.shared.f64 [r9+64], fd228;
barrier.sync 0;
shl.b32 r10, r7, 6;
sub.s32 r11, r9, r10;
ld.shared.f64 fd231, [r11];
ld.shared.f64 fd232, [r11+24];
ld.shared.f64 fd233, [r11+48];
ld.shared.f64 fd234, [r11+72];
ld.shared.f64 fd235, [r11+96];
ld.shared.f64 fd236, [r11+120];
ld.shared.f64 fd237, [r11+144];
ld.shared.f64 fd238, [r11+168];
ld.shared.f64 fd239, [r11+192];
barrier.sync 0;
st.shared.f64 [r9], fd108;
st.shared.f64 [r9+8], fd161;
st.shared.f64 [r9+16], fd171;
st.shared.f64 [r9+24], fd181;
st.shared.f64 [r9+32], fd191;
st.shared.f64 [r9+40], fd200;
st.shared.f64 [r9+48], fd210;
st.shared.f64 [r9+56], fd220;
st.shared.f64 [r9+64], fd230;
barrier.sync 0;
ld.shared.f64 fd240, [r11];
ld.shared.f64 fd241, [r11+24];
ld.shared.f64 fd242, [r11+48];
ld.shared.f64 fd243, [r11+72];
ld.shared.f64 fd244, [r11+96];
ld.shared.f64 fd245, [r11+120];
ld.shared.f64 fd246, [r11+144];
ld.shared.f64 fd247, [r11+168];
ld.shared.f64 fd248, [r11+192];
add.f64 fd249, fd234, fd237;
add.f64 fd250, fd243, fd246;
mul.f64 fd251, fd249, 0d3FE0000000000000;
sub.f64 fd252, fd231, fd251;
sub.f64 fd253, fd243, fd246;
mul.f64 fd254, fd253, 0d3FEBB67AE8584CAA;
mul.f64 fd255, fd250, 0d3FE0000000000000;
sub.f64 fd256, fd240, fd255;
sub.f64 fd257, fd234, fd237;
mul.f64 fd258, fd257, 0d3FEBB67AE8584CAA;
add.f64 fd259, fd235, fd238;
add.f64 fd260, fd244, fd247;
mul.f64 fd261, fd259, 0d3FE0000000000000;
sub.f64 fd262, fd232, fd261;
sub.f64 fd263, fd244, fd247;
mul.f64 fd264, fd263, 0d3FEBB67AE8584CAA;
mul.f64 fd265, fd260, 0d3FE0000000000000;
sub.f64 fd266, fd241, fd265;
sub.f64 fd267, fd235, fd238;
mul.f64 fd268, fd267, 0d3FEBB67AE8584CAA;
add.f64 fd269, fd236, fd239;
add.f64 fd270, fd245, fd248;
mul.f64 fd271, fd269, 0d3FE0000000000000;
sub.f64 fd272, fd233, fd271;
sub.f64 fd273, fd245, fd248;
mul.f64 fd274, fd273, 0d3FEBB67AE8584CAA;
mul.f64 fd275, fd270, 0d3FE0000000000000;
sub.f64 fd276, fd242, fd275;
sub.f64 fd277, fd236, fd239;
mul.f64 fd278, fd277, 0d3FEBB67AE8584CAA;
add.f64 %0, fd231, fd249;
add.f64 %1, fd240, fd250;
add.f64 %2, fd232, fd259;
add.f64 %3, fd241, fd260;
add.f64 %4, fd233, fd269;
add.f64 %5, fd242, fd270;
add.f64 %6, fd254, fd252;
sub.f64 %7, fd256, fd258;
add.f64 %8, fd264, fd262;
sub.f64 %9, fd266, fd268;
add.f64 %10, fd274, fd272;
sub.f64 %11, fd276, fd278;
sub.f64 %12, fd252, fd254;
add.f64 %13, fd258, fd256;
sub.f64 %14, fd262, fd264;
add.f64 %15, fd268, fd266;
sub.f64 %16, fd272, fd274;
add.f64 %17, fd278, fd276;
})"
     : "=d"(rmem[0].x), "=d"(rmem[0].y), "=d"(rmem[1].x), "=d"(rmem[1].y), "=d"(rmem[2].x), "=d"(rmem[2].y), "=d"(rmem[3].x), "=d"(rmem[3].y), "=d"(rmem[4].x), "=d"(rmem[4].y), "=d"(rmem[5].x), "=d"(rmem[5].y), "=d"(rmem[6].x), "=d"(rmem[6].y), "=d"(rmem[7].x), "=d"(rmem[7].y), "=d"(rmem[8].x), "=d"(rmem[8].y): "r"(smem), "l"(lut_dp_9_27), "d"(rmem[0].x), "d"(rmem[0].y), "d"(rmem[1].x), "d"(rmem[1].y), "d"(rmem[1].y), "d"(rmem[2].x), "d"(rmem[2].y), "d"(rmem[2].y), "d"(rmem[3].x), "d"(rmem[3].y), "d"(rmem[4].x), "d"(rmem[4].y), "d"(rmem[4].y), "d"(rmem[5].x), "d"(rmem[5].y), "d"(rmem[5].y), "d"(rmem[6].x), "d"(rmem[6].y), "d"(rmem[7].x), "d"(rmem[7].y), "d"(rmem[7].y), "d"(rmem[8].x), "d"(rmem[8].y));
};




template<> __forceinline__ __device__ void cufftdx_private_function<507, double, 1>(cufftdx::detail::complex<double> *rmem, unsigned smem){

asm volatile (R"({
.reg .b32 r<18>;
.reg .f64 fd<109>;
.reg .b64 rd<12>;
mov.u32 r1, %tid.y;
mov.u32 r2, %6;
mad.lo.s32 r3, r1, 216, r2;
mov.u32 r4, %tid.x;
add.f64 fd13, %11, %14;
add.f64 fd14, %9, fd13;
add.f64 fd15, %13, %15;
add.f64 fd16, %10, fd15;
mul.f64 fd17, fd13, 0d3FE0000000000000;
sub.f64 fd18, %9, fd17;
sub.f64 fd19, %13, %15;
mul.f64 fd20, fd19, 0d3FEBB67AE8584CAA;
add.f64 fd21, fd20, fd18;
sub.f64 fd22, fd18, fd20;
mul.f64 fd23, fd15, 0d3FE0000000000000;
sub.f64 fd24, %10, fd23;
sub.f64 fd25, %11, %14;
mul.f64 fd26, fd25, 0d3FEBB67AE8584CAA;
sub.f64 fd27, fd24, fd26;
add.f64 fd28, fd26, fd24;
mul.wide.u32 rd2, r4, 954437177;
shr.u64 rd3, rd2, 33;
cvt.u32.u64 r5, rd3;
mul.lo.s32 r6, r5, 9;
sub.s32 r7, r4, r6;
mad.lo.s32 r8, r5, 216, r3;
mul.wide.u32 rd4, r7, 16;
mov.u64 rd5, %7;
add.s64 rd6, rd5, rd4;
ld.global.v2.f64 {fd29, fd30}, [rd6];
mul.f64 fd33, fd29, fd21;
mul.f64 fd34, fd30, fd27;
sub.f64 fd35, fd33, fd34;
mul.f64 fd36, fd29, fd27;
fma.rn.f64 fd37, fd30, fd21, fd36;
ld.global.v2.f64 {fd38, fd39}, [rd6+144];
mul.f64 fd42, fd38, fd22;
mul.f64 fd43, fd39, fd28;
sub.f64 fd44, fd42, fd43;
mul.f64 fd45, fd38, fd28;
fma.rn.f64 fd46, fd39, fd22, fd45;
barrier.sync 0;
mad.lo.s32 r9, r7, 24, r8;
st.shared.f64 [r9], fd14;
st.shared.f64 [r9+8], fd35;
st.shared.f64 [r9+16], fd44;
barrier.sync 0;
shl.b32 r10, r7, 4;
sub.s32 r11, r9, r10;
ld.shared.f64 fd47, [r11];
ld.shared.f64 fd48, [r11+72];
ld.shared.f64 fd49, [r11+144];
barrier.sync 0;
st.shared.f64 [r9], fd16;
st.shared.f64 [r9+8], fd37;
st.shared.f64 [r9+16], fd46;
barrier.sync 0;
ld.shared.f64 fd50, [r11];
ld.shared.f64 fd51, [r11+72];
ld.shared.f64 fd52, [r11+144];
add.f64 fd53, fd48, fd49;
add.f64 fd54, fd47, fd53;
add.f64 fd55, fd51, fd52;
add.f64 fd56, fd50, fd55;
mul.f64 fd57, fd53, 0d3FE0000000000000;
sub.f64 fd58, fd47, fd57;
sub.f64 fd59, fd51, fd52;
mul.f64 fd60, fd59, 0d3FEBB67AE8584CAA;
add.f64 fd61, fd60, fd58;
sub.f64 fd62, fd58, fd60;
mul.f64 fd63, fd55, 0d3FE0000000000000;
sub.f64 fd64, fd50, fd63;
sub.f64 fd65, fd48, fd49;
mul.f64 fd66, fd65, 0d3FEBB67AE8584CAA;
sub.f64 fd67, fd64, fd66;
add.f64 fd68, fd66, fd64;
mul.wide.u32 rd7, r7, -1431655765;
shr.u64 rd8, rd7, 33;
cvt.u32.u64 r12, rd8;
mul.lo.s32 r13, r12, 3;
sub.s32 r14, r7, r13;
shl.b32 r15, r14, 3;
add.s32 r16, r8, r15;
mul.wide.u32 rd9, r12, 16;
mov.u64 rd10, %8;
add.s64 rd11, rd10, rd9;
ld.global.v2.f64 {fd69, fd70}, [rd11];
mul.f64 fd73, fd69, fd61;
mul.f64 fd74, fd70, fd67;
sub.f64 fd75, fd73, fd74;
mul.f64 fd76, fd69, fd67;
fma.rn.f64 fd77, fd70, fd61, fd76;
ld.global.v2.f64 {fd78, fd79}, [rd11+48];
mul.f64 fd82, fd78, fd62;
mul.f64 fd83, fd79, fd68;
sub.f64 fd84, fd82, fd83;
mul.f64 fd85, fd78, fd68;
fma.rn.f64 fd86, fd79, fd62, fd85;
barrier.sync 0;
mad.lo.s32 r17, r12, 72, r16;
st.shared.f64 [r17], fd54;
st.shared.f64 [r17+24], fd75;
st.shared.f64 [r17+48], fd84;
barrier.sync 0;
ld.shared.f64 fd87, [r11];
ld.shared.f64 fd88, [r11+72];
ld.shared.f64 fd89, [r11+144];
barrier.sync 0;
st.shared.f64 [r17], fd56;
st.shared.f64 [r17+24], fd77;
st.shared.f64 [r17+48], fd86;
barrier.sync 0;
ld.shared.f64 fd90, [r11];
ld.shared.f64 fd91, [r11+72];
ld.shared.f64 fd92, [r11+144];
add.f64 fd93, fd88, fd89;
add.f64 fd94, fd91, fd92;
mul.f64 fd95, fd93, 0d3FE0000000000000;
sub.f64 fd96, fd87, fd95;
sub.f64 fd97, fd91, fd92;
mul.f64 fd98, fd97, 0d3FEBB67AE8584CAA;
mul.f64 fd99, fd94, 0d3FE0000000000000;
sub.f64 fd100, fd90, fd99;
sub.f64 fd101, fd88, fd89;
mul.f64 fd102, fd101, 0d3FEBB67AE8584CAA;
add.f64 %0, fd87, fd93;
add.f64 %1, fd90, fd94;
add.f64 %2, fd98, fd96;
sub.f64 %3, fd100, fd102;
sub.f64 %4, fd96, fd98;
add.f64 %5, fd102, fd100;
})"
     : "=d"(rmem[0].x), "=d"(rmem[0].y), "=d"(rmem[1].x), "=d"(rmem[1].y), "=d"(rmem[2].x), "=d"(rmem[2].y): "r"(smem), "l"(lut_dp_3_27), "l"(lut_dp_3_9), "d"(rmem[0].x), "d"(rmem[0].y), "d"(rmem[1].x), "d"(rmem[1].y), "d"(rmem[1].y), "d"(rmem[2].x), "d"(rmem[2].y));
};




template<> __forceinline__ __device__ void cufftdx_private_function<508, double, 1>(cufftdx::detail::complex<double> *rmem, unsigned smem){

asm volatile (R"({
.reg .b32 r<18>;
.reg .f64 fd<121>;
.reg .b64 rd<12>;
mov.u32 r1, %tid.y;
mov.u32 r2, %6;
mad.lo.s32 r3, r1, 432, r2;
mov.u32 r4, %tid.x;
add.f64 fd13, %11, %14;
add.f64 fd14, %13, %15;
mul.f64 fd15, fd13, 0d3FE0000000000000;
sub.f64 fd16, %9, fd15;
sub.f64 fd17, %13, %15;
mul.f64 fd18, fd17, 0d3FEBB67AE8584CAA;
add.f64 fd19, fd18, fd16;
sub.f64 fd20, fd16, fd18;
mul.f64 fd21, fd14, 0d3FE0000000000000;
sub.f64 fd22, %10, fd21;
sub.f64 fd23, %11, %14;
mul.f64 fd24, fd23, 0d3FEBB67AE8584CAA;
sub.f64 fd25, fd22, fd24;
add.f64 fd26, fd24, fd22;
mul.wide.u32 rd2, r4, 954437177;
shr.u64 rd3, rd2, 33;
cvt.u32.u64 r5, rd3;
mul.lo.s32 r6, r5, 9;
sub.s32 r7, r4, r6;
mad.lo.s32 r8, r5, 432, r3;
mul.wide.u32 rd4, r7, 16;
mov.u64 rd5, %7;
add.s64 rd6, rd5, rd4;
ld.global.v2.f64 {fd27, fd28}, [rd6];
mul.f64 fd31, fd27, fd19;
mul.f64 fd32, fd28, fd25;
mul.f64 fd33, fd27, fd25;
ld.global.v2.f64 {fd34, fd35}, [rd6+144];
mul.f64 fd38, fd34, fd20;
mul.f64 fd39, fd35, fd26;
mul.f64 fd40, fd34, fd26;
barrier.sync 0;
mad.lo.s32 r9, r7, 48, r8;
add.f64 fd41, %10, fd14;
add.f64 fd42, %9, fd13;
st.shared.v2.f64 [r9], {fd42, fd41};
fma.rn.f64 fd43, fd28, fd19, fd33;
sub.f64 fd44, fd31, fd32;
st.shared.v2.f64 [r9+16], {fd44, fd43};
fma.rn.f64 fd45, fd35, fd20, fd40;
sub.f64 fd46, fd38, fd39;
st.shared.v2.f64 [r9+32], {fd46, fd45};
barrier.sync 0;
shl.b32 r10, r7, 5;
sub.s32 r11, r9, r10;
ld.shared.v2.f64 {fd47, fd48}, [r11];
ld.shared.v2.f64 {fd51, fd52}, [r11+144];
ld.shared.v2.f64 {fd55, fd56}, [r11+288];
add.f64 fd59, fd51, fd55;
add.f64 fd60, fd52, fd56;
mul.f64 fd61, fd59, 0d3FE0000000000000;
sub.f64 fd62, fd47, fd61;
sub.f64 fd63, fd52, fd56;
mul.f64 fd64, fd63, 0d3FEBB67AE8584CAA;
add.f64 fd65, fd64, fd62;
sub.f64 fd66, fd62, fd64;
mul.f64 fd67, fd60, 0d3FE0000000000000;
sub.f64 fd68, fd48, fd67;
sub.f64 fd69, fd51, fd55;
mul.f64 fd70, fd69, 0d3FEBB67AE8584CAA;
sub.f64 fd71, fd68, fd70;
add.f64 fd72, fd70, fd68;
mul.wide.u32 rd7, r7, -1431655765;
shr.u64 rd8, rd7, 33;
cvt.u32.u64 r12, rd8;
mul.lo.s32 r13, r12, 3;
sub.s32 r14, r7, r13;
shl.b32 r15, r14, 4;
add.s32 r16, r8, r15;
mul.wide.u32 rd9, r12, 16;
mov.u64 rd10, %8;
add.s64 rd11, rd10, rd9;
ld.global.v2.f64 {fd73, fd74}, [rd11];
mul.f64 fd77, fd73, fd65;
mul.f64 fd78, fd74, fd71;
mul.f64 fd79, fd73, fd71;
ld.global.v2.f64 {fd80, fd81}, [rd11+48];
mul.f64 fd84, fd80, fd66;
mul.f64 fd85, fd81, fd72;
mul.f64 fd86, fd80, fd72;
barrier.sync 0;
mad.lo.s32 r17, r12, 144, r16;
add.f64 fd87, fd48, fd60;
add.f64 fd88, fd47, fd59;
st.shared.v2.f64 [r17], {fd88, fd87};
fma.rn.f64 fd89, fd74, fd65, fd79;
sub.f64 fd90, fd77, fd78;
st.shared.v2.f64 [r17+48], {fd90, fd89};
fma.rn.f64 fd91, fd81, fd66, fd86;
sub.f64 fd92, fd84, fd85;
st.shared.v2.f64 [r17+96], {fd92, fd91};
barrier.sync 0;
ld.shared.v2.f64 {fd93, fd94}, [r11];
ld.shared.v2.f64 {fd97, fd98}, [r11+144];
ld.shared.v2.f64 {fd101, fd102}, [r11+288];
add.f64 fd105, fd97, fd101;
add.f64 fd106, fd98, fd102;
mul.f64 fd107, fd105, 0d3FE0000000000000;
sub.f64 fd108, fd93, fd107;
sub.f64 fd109, fd98, fd102;
mul.f64 fd110, fd109, 0d3FEBB67AE8584CAA;
mul.f64 fd111, fd106, 0d3FE0000000000000;
sub.f64 fd112, fd94, fd111;
sub.f64 fd113, fd97, fd101;
mul.f64 fd114, fd113, 0d3FEBB67AE8584CAA;
add.f64 %1, fd94, fd106;
add.f64 %0, fd93, fd105;
sub.f64 %3, fd112, fd114;
add.f64 %2, fd110, fd108;
add.f64 %5, fd114, fd112;
sub.f64 %4, fd108, fd110;
})"
     : "=d"(rmem[0].x), "=d"(rmem[0].y), "=d"(rmem[1].x), "=d"(rmem[1].y), "=d"(rmem[2].x), "=d"(rmem[2].y): "r"(smem), "l"(lut_dp_3_27), "l"(lut_dp_3_9), "d"(rmem[0].x), "d"(rmem[0].y), "d"(rmem[1].x), "d"(rmem[1].y), "d"(rmem[1].y), "d"(rmem[2].x), "d"(rmem[2].y));
};


#endif
